56+25+32.5
85+32.5
88+25
36=14
36+14
?hist
?reg
?lm
?order
?sum
?barplot
choose(1e6, 100)
set.seed(94304)
## function to calculate standard error for sample mean
calc.se <- function(vec) {
return(
sqrt(var(vec)/length(vec))
)
}
## define critical value for hypothesis test
## alpha is 0.1 so 5% density to each side
critical.val <- qt(.95, df=99)
results <- vector(length=10000)
for (i in 1:10000) {
# sample 10 standard normals
# note that true means are all 0
ys <- replicate(10, rnorm(n=100))
# get SEs for sample means using my fn
ses <- apply(ys, 2, calc.se)
# calc observed t stats; mean of each of the 10 vars divided by their SEs
t.stats <- colMeans(ys) / ses
# at least one reject occurs if most extreme t.stat exceeds critical val
results[i] <- ifelse(max(abs(t.stats)) > critical.val, 1, 0)
}
# massive false positive rate!!
# remember, we set alpha to 0.1
# so under null should only get 10% false positives!
sum(results)/10000
ys <- replicate(10, rnorm(n=100))
ys
ses <- apply(ys, 2, calc.se)
ses
t <- slope / se
set.seed(94304)
## function to calculate standard error for sample mean
calc.se <- function(vec) {
return(
sqrt(var(vec)/length(vec))
)
}
critical.val <- qt(.95, df=99)
results <- vector(length=10000)
for (i in 1:10000) {
# sample 10 standard normals
# note that true means are all 0
ys <- replicate(10, rnorm(n=100))
# get SEs for sample means using my fn
ses <- apply(ys, 2, calc.se)
# calc observed t stats; mean of each of the 10 vars divided by their SEs
t.stats <- colMeans(ys) / ses
# at least one reject occurs if most extreme t.stat exceeds critical val
results[i] <- ifelse(max(abs(t.stats)) > critical.val, 1, 0)
}
critical.val <- qt(.95, df=99)
results <- vector(length=10000)
for (i in 1:10000) {
# sample 10 standard normals
# note that true means are all 0
ys <- replicate(10, rnorm(n=100))
# get SEs for sample means using my fn
ses <- apply(ys, 2, calc.se)
# calc observed t stats; mean of each of the 10 vars divided by their SEs
t.stats <- colMeans(ys) / ses
# at least one reject occurs if most extreme t.stat exceeds critical val
results[i] <- ifelse(max(abs(t.stats)) > critical.val, 1, 0)
}
sum(results)/10000
library(xtable)
?model.matrix
db <- available.packages()
deps <- tools::package_dependencies("plm", db)$plm
install.packages(deps)
install.packages(deps)
install.packages(deps)
deps
db <- available.packages()
deps <- tools::package_dependencies("plm", db)$plm
install.packages(deps)
install.packages(deps)
install.packages(deps)
install.packages(deps)
install.packages(deps)
db <- available.packages()
deps <- tools::package_dependencies("lme4", db)$lme4
deps
install.packages(deps)
install.packages(deps)
install.packages(deps)
install.packages(deps)
load("fraud.Rdata")
head(russia2011)
fractions<-(fractions(russia2011$votes/russia2011$turnout))
library(MASS)
fractions<-(fractions(russia2011$votes/russia2011$turnout))
names(sort(summary(as.factor(c(fractions))), decreasing=T)[1:10])
?hist
c(0,sort(unique(round(russia2011$rvote,3))))
russia2011$rvote <- russia2011$votes / russia2011$turnout
c(0,sort(unique(round(russia2011$rvote,3))))
length(unique(russia2011$rvote))
plot_breaks<-c(0,sort(unique(russia2011$rvote)))
hist(russia2011$rvote,plot_breaks)
length(c(0,sort(unique(russia2011$rvote))))
rm(list=ls())
load("fraud.Rdata")
russia2011$rvote <- russia2011$votes / russia2011$turnout
sort(table(russia2011$rvote), decreasing=T)[1:10]
hist(russia2011$rvote, freq=F, ylim=c(0,200), breaks=length(unique(russia2011$rvote)))
b <- 1000
turnout.rate <- russia2011$turnout / russia2011$N
simulated.votes <- matrix(nrow=b, ncol=nrow(russia2011))
for (i in 1:b) {
turnout <- rbinom(nrow(russia2011), size=russia2011$N, prob=turnout.rate)
simulated.votes[i,] <- rbinom(nrow(russia2011), size=turnout, prob=russia2011$rvote) / turnout
simulated.votes[i, is.na(simulated.votes[i,])] <- 0
}
table(simulated.votes[1,]==0.5)["TRUE"]
floor(2.4)
floor(0.5)
floor(0.6)
79+5
84+11
86+4+4
78+5+2
85+9
77+7+4
76+7+7+4
16+15
26+15
16+15
49+12
61+12+10
61+12+10
9+9
28+9+5
69+6+4+8+8
109+4
89+2+4
95+9
93+7+3
82+9
16+5
21+12
53+12+4
69+6+10
64+12
86+3+4
12+20
79+6+4+7
46+12
58+4
68+7+4+10
89+7
9+15
34+9+8
61+4.5
77.5+6
74+7
81+12+10
14+20+15
79+5
84+6+4
11+6
17+10+8
67+6
73+9+3+1
84+5
9+11
66+7
94+4
36+9
51+5
59+5
64+12+10
75+7
92+8+3
36+9+5
75+7
86+6+10
85+6+4
95+9+10
57-0.5
56.5+12
68.5+7+9+10
9+5+10
64+12
76+21
97+7
74+8+10
14+20+10
64+13
77+6+10
93+6+4
48+6+6+4+10
36+13
79+5+12
48+5+5
78+7
85+8+2+9
46+13
79+12+5
14+30
74+7+10+6
107+4
58+9+5
72+12
94+6
16+20
71+12
83+6+4
35+9
64+12
76+6
102+3
89+4+3
36+8
49+5
79+3+4+4
90+16
32+9+4
65+9
75+6+7+10
98+4
86+9
84+7+4
16+6
46+12
19+5
44+9
77+12
22+9
67+5
13+9
65+6+7
88+3
16+9
71+6
77=4
77+4
14+20
74+6+7+4+10
14+20
84=12
84+12
74+7+4
85+3+10+9
9+5+8+15
76+8+8+8
16+8
49+4
68+5
73+8+2
66+5+3
74+12
86+8+9
12+6
18+3.5+6+6+1
74+7
81+6
36+6
62+4
76+5
81+4+4
89+9+10
13+10+1
84+4+10
11+9
26+9
49+10+5
64+7+6+4
81+7+8+4
29+6
59+10+12
81+15+4
16+6
46+12
58+4+3
118+5
99+4
108+3.5
110/122*1
2.5*22
55+5.5
2.5*22
rm(list=ls())
x<-1:100
y<-1:100
plot(y~x,pch=20)+identify(y~x)
mv ~/.rstudio-desktop ~/backup-rstudio-desktop
knitr::opts_chunk$set(echo = TRUE)
## rm(list=ls())
setwd('/Users/tongtongzhang/Dropbox (IPL)/155B Machine Learning/Lecture/Lecture 2')
knitr::opts_chunk$set(echo = TRUE)
setwd('/Users/tongtongzhang/Dropbox (IPL)/155B Machine Learning/Lecture/Lecture 2')
dat <- read.csv("TimeChange.csv")
class(dat)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share") + identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share") + identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, label = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share") + identify(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, labels = dat$Year)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year,pos=T)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year,pos=T)
View(dat)
plot( dat$Incumbent_Net_Approval, dat$IncumbentVoteShare,pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year,pos=T)
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot( dat$Incumbent_Net_Approval, dat$IncumbentVoteShare,pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot( dat$Incumbent_Net_Approval, dat$IncumbentVoteShare,pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
p<-identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare,pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
p<-identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year, plot=TRUE)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share") + identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share") + identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
knitr::opts_chunk$set(echo = TRUE)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
lin <- lm(IncumbentVoteShare ~ Incumbent_Net_Approval, data = dat)
plot(dat$IncumbentVoteShare ~ dat$Incumbent_Net_Approval, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
abline(lin, col = "red")
names(lin)
summary(lin$fitted.values)
summary(lin$residuals)
predict(lin, Incumbent_Net_Approval=100)
?predict
predict(lin, newdata=data.frame(Incumbent_Net_Approval=100))
my_pred <- function(mod, variable){
coefs <- mod$coefficients
pred <- coefs[1] + coefs[2]*variable
return(pred)
}
new_obs <- c(-4, 100, -100, 50) # what do these values substantively mean
my_pred(lin, new_obs) # what is this returning?
predict(lin, newdata=data.frame(Incumbent_Net_Approval=new_obs))
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
plot(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, pch = 19, xlab = "Net Approval",
ylab = "Incumbent Vote Share")
identify(dat$Incumbent_Net_Approval, dat$IncumbentVoteShare, labels = dat$Year)
lin$coefficients
lin$coefficients[2]
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE
)
setwd("/Users/tongtongzhang/Dropbox (IPL)/150B Machine Learning/Homeworks/HW_1")
# load data
ca <- read.csv("ca2006.csv")
sample(ca$district,53,TRUE)
sam<-sample(ca$district,53,TRUE)
ca[sam,]
sample(1:53,T)
sample(1:53,replace=T)
length(sample(1:53,replace=T))
?ecdf
ecdf(1:100)
x<-ecdf(1:100)
x[5]
x(0.5)
39+7+10+25
39+7+25+10+5
39+21
42+25+7+2.5
48+25+8+5
77+5+10
48+7+25
knitr::opts_chunk$set(echo = TRUE)
rm(list=ls())
setwd('/Users/tongtongzhang/Dropbox (IPL)/150B Machine Learning/04_Section')
load("NYT.RData")
train<- nyt_list[[1]]
train_label<- nyt_list[[2]]
test<- nyt_list[[3]]
test_label<- nyt_list[[4]]
dim(train)
dim(test)
#install.packages('glmnet')
library(glmnet)
lasso <-glmnet(x=train, y=train_label) #YOUR CODE HERE
names(lasso) #return all output stored in "lasso" (our glmnet() object)
# sum absolute values of the betas
sum_beta <- colSums(abs(lasso$beta))
# plot against values of lambda
plot(sum_beta ~ lasso$lambda)
plot(lasso$df ~ lasso$lambda)
plot(lasso$df~lasso$lambda)
rm(list=ls())
x<-c(1,2,5,3,6)
order(x,decreasing=T)
?predict.glm
x<-1:100
y<-5*x+rnorm(100)
lm(y~x,family="binomial")
install.packages("tm")
library(tm)
knitr::opts_chunk$set(echo = TRUE)
rm(list=ls())
#setwd('~YOUR/PATH/HERE/06_Preprocessing')
setwd("~/Dropbox (IPL)/150B Machine Learning/06_Section")
library(tm) # Framework for text mining
library(RTextTools) # a machine learning package for text classification written in R
install.packages("RTextTools")
library(RTextTools) # a machine learning package for text classification written in R
getSources()
getReaders()
rm(list=ls())
setwd("/Users/tongtongzhang/Dropbox (IPL)/Job experiment/Replication_files/Data")
d<-read.csv("resume_exp.csv",header=TRUE, stringsAsFactors = FALSE,sep=",")
rm(list=ls())
setwd("/Users/tongtongzhang/Dropbox (IPL)/Job experiment/Replication_files/Data")
dd<-read.csv("resume_exp.csv",header=TRUE, stringsAsFactors = FALSE,sep=",")
colnames(dd)
setwd("/Users/tongtongzhang/Dropbox (IPL)/Job experiment/Analysis/Data")
d<-read.csv("apply_analysis2.csv",header=TRUE, stringsAsFactors = FALSE,sep=",") #Read in the data
colnames(d)
dd$gradation<-d$gradation
dd$region<-d$region1
dd$fulltime<-d$fulltime
head(d$employers)
dd$gov_industry<-d$gov_industry
dd$gradation<-d$gradation
dd$region<-d$region1
dd$fulltime<-d$fulltime
dd$gov_industry<-d$gov_industry
dd$comm_edu<-d$comm_edu
dd$energy<-d$energy
dd$real_estate<-d$real_estate
dd$tech_industry<-d$tech_industry
dd$finance<-d$finance
dd$auto_manufact<-d$auto_manufact
dd$health<-d$health
dd$consumer<-d$consumer
dd$agriculture<-d$agriculture
dd$service_industry<-d$other_service
dd$tech_job<-d$tech_job
dd$professional<-d$professional
dd$teacher<-d$teacher
dd$secretariat<-d$secretariat
dd$service_bluecollar<-d$service
dd$worker<-d$worker
colnames(dd)
table(dd$comm_edu,exclude=NULL)
table(dd$energy,exclude=NULL)
table(dd$finance,exclude=NULL)
table(dd$health,exclude=NULL)
table(dd$service_industry,exclude=NULL)
table(dd$service_bluecollar,exclude=NULL)
table(dd$worker,exclude=NULL)
table(dd$teacher,exclude=NULL)
table(dd$professional,exclude=NULL)
colnames(dd)
setwd("/Users/tongtongzhang/Dropbox (IPL)/Job experiment/Replication_files/Data")
write.csv(dd,"resume_exp.csv", row.names=FALSE)
rm(list=ls())
setwd("/Users/tongtongzhang/Dropbox (IPL)/Job experiment/Replication_files/Data")
d<-read.csv("resume_exp.csv",header=TRUE, stringsAsFactors = FALSE,sep=",")
## Check data
dim(d)
colnames(d)
table(d$region)
table(d$gov)
table(d$gov_industry)
